In [ ]:
import polars as pl
import pathlib
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from itables import init_notebook_mode

pio.renderers.default = 'plotly_mimetype+notebook'
init_notebook_mode(all_interactive=True)
In [ ]:
path_data = pathlib.Path.cwd().parent / 'data'
path_input = path_data / 'input'
path_output = path_data / 'output'

path_df = path_output / 'HY1999-2016_20230713_P2A0.csv'
In [ ]:
df = pl.read_csv(str(path_df), infer_schema_length=6000)
df.head()
Out[ ]:
HarvestYear ID2 Longitude Latitude SampleID Crop GrainSampleArea_P1 GrainMassWet_P1 GrainMassWetInGrainSample_P1 GrainMassWetInGrainSample_P2 GrainMassOvenDryInGrainSample_P1 GrainMassOvenDryInGrainSample_P2 GrainMassAirDry_P1 GrainMassOvenDry_P1 GrainMassOvenDry_P2 GrainMoistureProportion_P2 GrainMoisture_P1 GrainProtein_P1 GrainStarch_P1 GrainWGlutDM_P1 GrainOilDM_P1 GrainTestWeight_P1 GrainCarbon_P1 GrainNitrogen_P1 GrainSulfur_P1 BiomassSampleArea_P1 BiomassWet_P1 BiomassAirDry_P1 GrainMassWetInBiomassSample_P1 GrainMassOvenDryInBiomassSample_P1 GrainMassOvenDryInBiomassSample_P2 ResidueMassWetSubsample_P1 ResidueMassOvenDrySubsample_P1 ResidueMoistureProportionSubsample_P2 ResidueCarbon_P1 ResidueNitrogen_P1 ResidueSulfur_P1 Comments_P1 GrainYieldWet_P2 GrainYieldAirDry_P2 GrainYieldOvenDry_P2 ResidueMassWet_P2 ResidueMassOvenDry_P2 ResidueMassWetPerArea_P2 ResidueMassOvenDryPerArea_P2 ResidueMassAirDry_P2 ResidueMassAirDryPerArea_P2
Loading... (need help?)

Data means¶

Here's a big'o'table of mean values for all the measurement columns. Means were calculated per year and per crop. Some means may not make sense to calculate, so use your discretion.

In [ ]:
non_metric_cols = ['HarvestYear', 'ID2', 'Longitude', 'Latitude', 'SampleID', 'Crop']
metric_cols = [x for x in df.columns if x not in non_metric_cols]

df_agg_yr = (
    df.groupby(['HarvestYear', 'Crop'])
    .agg(
        [pl.col(f'{c}').mean().alias(f'{c}_mean') for c in metric_cols]
    )
    .sort(['Crop', 'HarvestYear'])
) 

display(df_agg_yr)
HarvestYear Crop GrainSampleArea_P1_mean GrainMassWet_P1_mean GrainMassWetInGrainSample_P1_mean GrainMassWetInGrainSample_P2_mean GrainMassOvenDryInGrainSample_P1_mean GrainMassOvenDryInGrainSample_P2_mean GrainMassAirDry_P1_mean GrainMassOvenDry_P1_mean GrainMassOvenDry_P2_mean GrainMoistureProportion_P2_mean GrainMoisture_P1_mean GrainProtein_P1_mean GrainStarch_P1_mean GrainWGlutDM_P1_mean GrainOilDM_P1_mean GrainTestWeight_P1_mean GrainCarbon_P1_mean GrainNitrogen_P1_mean GrainSulfur_P1_mean BiomassSampleArea_P1_mean BiomassWet_P1_mean BiomassAirDry_P1_mean GrainMassWetInBiomassSample_P1_mean GrainMassOvenDryInBiomassSample_P1_mean GrainMassOvenDryInBiomassSample_P2_mean ResidueMassWetSubsample_P1_mean ResidueMassOvenDrySubsample_P1_mean ResidueMoistureProportionSubsample_P2_mean ResidueCarbon_P1_mean ResidueNitrogen_P1_mean ResidueSulfur_P1_mean Comments_P1_mean GrainYieldWet_P2_mean GrainYieldAirDry_P2_mean GrainYieldOvenDry_P2_mean ResidueMassWet_P2_mean ResidueMassOvenDry_P2_mean ResidueMassWetPerArea_P2_mean ResidueMassOvenDryPerArea_P2_mean ResidueMassAirDry_P2_mean ResidueMassAirDryPerArea_P2_mean
Loading... (need help?)
In [ ]:
def create_yield_comparison_plot(crop, grain_or_residue):

    # init with 'grain' values
    wet_var = 'GrainYieldWet_P2'
    air_var = 'GrainYieldAirDry_P2'
    oven_var = 'GrainYieldOvenDry_P2'

    if(grain_or_residue == 'residue'):
        wet_var = 'ResidueMassWetPerArea_P2'
        air_var = 'ResidueMassAirDryPerArea_P2'
        oven_var = 'ResidueMassOvenDryPerArea_P2'
        

    fig = go.Figure()

    crop_df = df.filter(pl.col('Crop') == crop)

    x = crop_df['HarvestYear'].to_list()

    # Start/End cutoffs for years were methods and/or data were different
    cutoff_a0 = 1999
    cutoff_a1 = 2010

    cutoff_b0 = 2011
    cutoff_b1 = 2012

    cutoff_c0 = 2013
    cutoff_c1 = 2016

    # Wet averages
    wet_mean_1999_2010 = crop_df.filter(pl.col('HarvestYear') <= cutoff_a1)[wet_var].mean()
    wet_mean_2011_2012 = crop_df.filter((pl.col('HarvestYear') > cutoff_a1) & (pl.col('HarvestYear') <= cutoff_b1))[wet_var].mean()
    wet_mean_2013_2016 = crop_df.filter(pl.col('HarvestYear') > cutoff_b1)[wet_var].mean()

    # Air dry averages
    air_mean_1999_2010 = crop_df.filter(pl.col('HarvestYear') <= cutoff_a1)[air_var].mean()
    air_mean_2011_2012 = crop_df.filter((pl.col('HarvestYear') > cutoff_a1) & (pl.col('HarvestYear') <= cutoff_b1))[air_var].mean()
    air_mean_2013_2016 = crop_df.filter(pl.col('HarvestYear') > cutoff_b1)[air_var].mean()

    # Oven dry averages
    oven_mean_1999_2010 = crop_df.filter(pl.col('HarvestYear') <= cutoff_a1)[oven_var].mean()
    oven_mean_2011_2012 = crop_df.filter((pl.col('HarvestYear') > cutoff_a1) & (pl.col('HarvestYear') <= cutoff_b1))[oven_var].mean()
    oven_mean_2013_2016 = crop_df.filter(pl.col('HarvestYear') > cutoff_b1)[oven_var].mean()

    # Box plots
    fig.add_trace(go.Box(
        x = x,
        y = crop_df[wet_var].to_list(),
        name = 'Wet',
        marker_color='blue'
    ))

    fig.add_trace(go.Box(
        x = x,
        y = crop_df[air_var].to_list(),
        name='Air',
        marker_color='green'
    ))

    fig.add_trace(go.Box(
        x = x,
        y = crop_df[oven_var].to_list(),
        name='Oven',
        marker_color = 'red'
    ))

    # Title, xaxis, add dashed v lines
    fig.update_layout(
        boxmode='group',
        title={'text':crop,
               'y':0.9,
               'x':0.5,
               'xanchor':'center',
               'yanchor':'top'},
        xaxis=dict(range=[1998,2017]))
    fig.add_vline(x = cutoff_a1 + 0.5, line_width = 3, line_dash='dash')
    fig.add_vline(x = cutoff_b1 + 0.5, line_width = 3, line_dash='dash')

    #Wet averages
    fig.add_shape(type='line',
                  x0=cutoff_a0,
                  x1=cutoff_a1,
                  y0=wet_mean_1999_2010,
                  y1=wet_mean_1999_2010,
                  xref='x',
                  yref='y',
                  line = dict(color='Blue'))
    fig.add_shape(type='line',
                  x0=cutoff_b0,
                  x1=cutoff_b1,
                  y0=wet_mean_2011_2012,
                  y1=wet_mean_2011_2012,
                  xref='x',
                  yref='y',
                  line = dict(color='Blue'))
    fig.add_shape(type='line',
                  x0=cutoff_c0,
                  x1=cutoff_c1,
                  y0=wet_mean_2013_2016,
                  y1=wet_mean_2013_2016,
                  xref='x',
                  yref='y',
                  line = dict(color='Blue'))
    
    #Wet averages
    fig.add_shape(type='line',
                  x0=cutoff_a0,
                  x1=cutoff_a1,
                  y0=air_mean_1999_2010,
                  y1=air_mean_1999_2010,
                  xref='x',
                  yref='y',
                  line = dict(color='Green'))
    fig.add_shape(type='line',
                  x0=cutoff_b0,
                  x1=cutoff_b1,
                  y0=air_mean_2011_2012,
                  y1=air_mean_2011_2012,
                  xref='x',
                  yref='y',
                  line = dict(color='Green'))
    fig.add_shape(type='line',
                  x0=cutoff_c0,
                  x1=cutoff_c1,
                  y0=air_mean_2013_2016,
                  y1=air_mean_2013_2016,
                  xref='x',
                  yref='y',
                  line = dict(color='Green'))
    
    #Oven averages lines
    fig.add_shape(type='line',
                  x0=cutoff_a0,
                  x1=cutoff_a1,
                  y0=oven_mean_1999_2010,
                  y1=oven_mean_1999_2010,
                  xref='x',
                  yref='y',
                  line = dict(color='Red'))
    fig.add_shape(type='line',
                  x0=cutoff_b0,
                  x1=cutoff_b1,
                  y0=oven_mean_2011_2012,
                  y1=oven_mean_2011_2012,
                  xref='x',
                  yref='y',
                  line = dict(color='Red'))
    fig.add_shape(type='line',
                  x0=cutoff_c0,
                  x1=cutoff_c1,
                  y0=oven_mean_2013_2016,
                  y1=oven_mean_2013_2016,
                  xref='x',
                  yref='y',
                  line = dict(color='Red'))

    fig.show()
In [ ]:
crops = list(set(df['Crop'].to_list()))

Box plots¶

The following figures show box plots of each crop across the years.

Vertical dashed lines indicate a change of methods (and/or available data) from the previous year to the next. That is, the way the biomass and/or grain was dried, and what data were recorded, changed.

The horizontal lines indicate mean values for the years within the vertical lines. E.g. In the below image, the blue horizontal line that spans the years 1999 - 2009 has the y-value of the means value for wet grain yield (g/m2).

The point of these figures is to get a feel of the variability of the data across years and between measurement methods.

(oh, and ignore AL, it's all funky)

Grain yields (g/m2)¶

In [ ]:
for crop in crops:
    create_yield_comparison_plot(crop, 'grain')

Residue yields (g/m2)¶

In [ ]:
for crop in crops:
    create_yield_comparison_plot(crop, 'residue')